home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Skunkware 5
/
Skunkware 5.iso
/
src
/
Tools
/
glimpsehttp
/
news
/
getnews
< prev
next >
Wrap
Text File
|
1995-05-16
|
8KB
|
284 lines
#!/usr/local/bin/perl
#/****************************************************
#**
#** SOURCE NAME | getnews, (Get News)
#** |
#** SYNOPSIS | getnews [-h hostname] [-p port] [-n cfgfile] [-W timeout]
#** |
#** DESCRIPTION | getnews goes to a specified NNTP server
#** | and saves new news articles
#** | into directory ./groups/<newsgroup>/
#** | Please see the NOTES section.
#** |
#** CHANGES | Programmer: Date: Reason/Comments
#** | Jeffrey B. McGough 07-27-92 VERSION 2.0 (pgnews)
#** | Pavel Klark 03-26-94 VERSION 1.0 (getnews)
#** |
#** NOTES | getnews needs a file named getnews.cfg to read
#** | its newsgroup and last message number from.
#** | getnews.cfg format is:
#** | newsgroup number
#** | Example:
#** | comp.unix.wizards 7800
#** | comp.unix.shell 3203
#** | comp.unix.questions 546
#** |
#** | getnews is able to process trn-style kill-file commands:
#** | it locates your kill-file in directory $HOME/News,
#** | (See variable $kill_location below).
#** | and expects all commands to be of format
#** | /pattern/some-commands. Article is killed (doesn't get
#** | archived) if any header line matches the pattern.
#** |
#** AUTHORS | Jeffrey B. McGough mcgough@wrdis01.af.mil
#** | Pavel Clark, paul@cs.arizona.edu
#** |
#****************************************************/
unshift(@INC,'/usr1/paul/lib/perl');
$gzip = "/usr/local/bin/gzip -f";
$GLIMPSEIDX_LOC='/usr/paul/bin/glimpseindex';
# Your kill-file top directory, the following is trn's default
$kill_location = $ENV{'HOME'} . "/News";
require 'sys/socket.ph'; # The way I coded the sockets is this necessary?
require 'getopts.pl';
# -p portnumber : Port to connect to; default 119
# -h host : Server host to connect to
# -n getnews : Name of getnews file; default getnews.cfg
# -W timeout : Timeout wait period for response, sec.; default 900 (= 15min)
$opt_h = $ENV{'NNTPSERVER'};
$opt_h = 'cs.arizona.edu' unless $ENV{'NNTPSERVER'};
$opt_p = 119;
$opt_n = 'getnews.cfg';
$opt_W = 900;
&Getopts ('h:p:n:W:');
$VERSION = '2.0';
$port = $opt_p; # For NNTP
# HOSTNAME for the server...
$host = $opt_h;
# Pack format...
$sockaddr = 'S n a4 x8';
$waittime = $opt_W;
$DOMAIN = &AF_INET;
$STYLE = &SOCK_STREAM;
$newsfile = $opt_n;
$nnewsfile = "${opt_n}.new";
$newarticles = "groups/newarticles";
$rin = $rout = '';
($name, $aliases, $proto) = getprotobyname('tcp');
($name, $aliases, $type, $len, $hostaddr) = gethostbyname($host);
$sock = pack($sockaddr, $DOMAIN, $port, $hostaddr);
socket(S, $DOMAIN, $STYLE, $proto) || die $!;
connect(S, $sock) || die $!;
select(S); $| = 1; select(STDOUT);
#set up for select
vec($rin, fileno(S), 1) = 1;
#this select will block until the server gives us something.
$nfound = select($rout=$rin, undef, undef, $waittime);
if ($nfound == 0)
{
print "Socket timed out...";
exit 1;
}
$_ = <S>; #Read one line to see if we got a good connection.
if (!/^2../)
{
print;
die "Service unavailable";
}
open(GRP, "<$newsfile") || die "Could not open $newsfile: $!";
open(NGRP, ">$nnewsfile") || die "Could not open $nnewsfile: $!";
open(IDXFILE, ">>$newarticles");
$totalcount = 0;
select(NGRP); $| = 1; select(STDOUT);
group: while(<GRP>) {
if (/^#/) {
# leave the comment as is
print NGRP $_;
next group;
}
chop;
($grp, $lgot) = split;
print(S "group $grp\n");
#this select will block until the server gives us something.
$nfound = select($rout=$rin, undef, undef, $waittime);
if ($nfound == 0)
{
print "Socket timed out...";
exit 1;
}
$_ = <S>; #Make sure the group change worked...
($stat, $num, $first, $last) = split;
if( $stat !~ /^2../ )
{
warn "Bad group $grp: $_";
print(NGRP "$grp $lgot\n");
next group;
}
#
# create group directories, if necessary
#
-d "groups" || mkdir("groups",0777) ||
die "Couldn't create directory groups: $!" ;
-d "groups/$grp" || mkdir("groups/$grp",0777) ||
die "Couldn't create directory groups/$grp: $!" ;
-d "indices" || mkdir("indices",0777) ||
die "Couldn't create directory indices: $!" ;
-d "indices/$grp" || mkdir("indices/$grp",0777) ||
die "Couldn't create directory indices/$grp: $!" ;
#
# access kill-file (in directory $HOME/News)
#
$dir = $kill_location;
$killfile = $grp;
$killfile =~ s|\.|/|g;
$killfile = "$dir/$killfile/KILL";
if (open(KILL, $killfile))
{
@karray = ();
while (<KILL>) {
($dummy,$pattern) = split(m|/|);
push(@karray,$pattern) if $pattern;
}
} else {
$killfile = undef;
}
close(KILL);
if ( $first > $lgot )
{
$lgot = $first;
}
$count = 0;
if ( $lgot < $last )
{
article: foreach $art ($lgot..$last)
{
print(S "article $art\n");
#this select will block until the server gives us something.
$nfound = select($rout=$rin, undef, undef, $waittime);
if ($nfound == 0)
{
print "Socket timed out...";
exit 1;
}
$_ = <S>; #get error if one exists
if(!/^2../)
{
warn "No article $art in $grp\n";
next article;
}
# We now slurp the whole article into the array article...
# HMMM is this good or bad...
# It gives me the WILLIES [:^) Jeffrey B. McGough
@article = ();
do {
# The next few lines have been commented out because they don't work
# JBM 07-27-92
# $nfound = select($rout=$rin, undef, undef, $waittime);
# if ($nfound == 0)
# {
# print "Socket timed out...";
# exit 1;
# }
$lgot = $art;
$_ = <S>;
s/\r//g;
if( $_ ne ".\n") {
push(@article,$_);
} else {
push(@article,"\n");
}
} until $_ eq ".\n";
if ( !&desc ) {
# header matches kill-file
next article;
}
++$count;
++$totalcount;
}
} else {
$lgot -= 1;
}
$lgot += 1;
print(NGRP "$grp $lgot\n");
print "$grp: $count new articles\n";
if ($count>0) {
$cmd = "exec $GLIMPSEIDX_LOC -o -z -H indices/$grp ".
"groups/$grp >/dev/null";
system "$cmd";
}
}
close(NGRP);
close(GRP);
close(IDXFILE);
if ($totalcount>0) {
$cmd = "build_idx &";
system "$cmd";
} else {
unlink("$newarticles");
}
rename ($newsfile, "$newsfile.old") ||
warn ("Unable to rename $newsfile to ${newsfile}.old\n");
rename ($nnewsfile, $newsfile) ||
warn ("Unable to rename ${nnewsfile} to ${newsfile}\n");
print( S "quit\n");
close(S);
# We parse through @article to extract header information
# and then save the article
# Returns article no, or empty string if article is to be killed
sub desc
{
local($pattern,$author,$subject,$ID,$date,$filename);
# global parameters: $grp, $art, @article, @karray
scan: foreach (@article) {
last scan if /^\n$/;
foreach $pattern (@karray) {
if (/$pattern/i) {
return undef;
}
}
s/\s+/ /;
if ( /^From: (.*)/ ) {
$author = $1;
if ($author =~ /([\w\d][-+\w\d.]*@[\w\d][-\w\d.]*)/) {
$address = $1;
} else {
$address = $author;
}
} elsif (/^Message-ID: \<?([^\s\>]*)/) {
$ID=$1;
} elsif (/^Subject: (.*)/) {
$subject=$1;
} elsif (/^Date: (.*)/) {
$date=$1;
}
}
# good article, now open output file...
$filename = "/$grp/$art";
$file = "groups$filename";
open(OUTFILE, ">$file") ||
die "Could not open $file";
print OUTFILE @article;
close(OUTFILE);
if ($grp =~ /soc.culture/) {
system("$gzip $file");
$filename .= ".gz";
}
# Write header information
print IDXFILE "$filename\t$ID\t$address\t$author\t$subject\t$date\n";
return $art;
}